#!/usr/bin/python
# -*- coding: utf-8 -*-
'''
@date: 2011-11-16
@author: shell.xu
'''
import os, sys, getopt
import segment

dbname = 'frq.db'

def create(filepath):
    ''' create filepath : create db form txt dict. '''
    ddb = segment.dictdb()
    ddb.importtxt(filepath)
    ddb.savefile(dbname)

def importdb(filepath):
    ''' importdb filepath : import txt dict to db. '''
    ddb = segment.dictdb(dbname)
    ddb.importtxt(filepath)
    ddb.sync()

def exportdb(filepath):
    ''' exportdb filepath : export txt dict from db. '''
    ddb = segment.dictdb(dbname)
    with open(filepath, 'w') as fo: ddb.exporttxt(fo)

def add(word, frq):
    ''' add word frq : add a new word with frequency. '''
    ddb = segment.dictdb(dbname)
    ddb.add(word.decode('utf-8'), float(frq))
    ddb.sync()

def remove(word):
    ''' remove word : remove word. '''
    ddb = segment.dictdb(dbname)
    ddb.remove(word.decode('utf-8'))
    ddb.sync()

def lookup(word):
    ''' lookup word : lookup word from dict. '''
    ddb = segment.dictdb(dbname)
    word = word.decode('utf-8')
    if len(word) == 1: print word, ddb.gets(word)
    else: print word, ddb.get(word)

def cals(word):
    ''' cals word : calculate word frequency as char set. '''
    ddb = segment.dictdb(dbname)
    word = word.decode('utf-8')
    print word, ddb.cals(word)

def stat():
    ''' stat : dict db statistic. '''
    ddb = segment.dictdb(dbname)
    count, sumval = len(list(ddb.values())), sum(ddb.values())
    print 'count: %d\nsumvalue: %f' % (count, sumval)
    print 'avgvalue: %f\nmaxvalue: %f' % (sumval / count, max(ddb.values()))
    print 'root: %d\navgdict: %f' % (len(ddb.db), float(count) / len(ddb.db))

def waterlevel(wl):
    ''' waterlevel wl : see how many word has higher frequency then waterlevel. '''
    ddb = segment.dictdb(dbname)
    print len([i for i in ddb.values() if i >= threshold])

def scale(factor):
    ''' scale factor : scale dict with factor. '''
    ddb = segment.dictdb(dbname)
    f = float(factor)
    for h, vs in ddb.db.iteritems():
        for k, v in vs.iteritems(): vs[k] = v * f
    ddb.normalize()
    ddb.sync()

def shrink(threshold):
    ''' shrink threshold : remove words which has lower frequency then threshold. '''
    ddb = segment.dictdb(dbname)
    t = float(threshold)
    zero = []
    for h, vs in ddb.db.iteritems():
        ddb.db[h] = dict([(k, v) for k, v in vs.iteritems() if v >= t])
        if len(ddb.db[h]) == 0: zero.append(h)
    for z in zero: del ddb.db[z]
    ddb.normalize()
    ddb.sync()

def flat():
    ''' flat : set every word's frequency to 1. '''
    ddb = segment.dictdb(dbname)
    for h, vs in ddb.db.iteritems():
        for k, v in vs.iteritems(): vs[k] = 1
    ddb.normalize()
    ddb.sync()

def write_dict(words, outfile):
    with open(outfile, 'w') as fo:
        for word in list(words):
            fo.write((u'%s 1\n' % word).encode('utf-8'))

def read_file(infile, func, words):
    with open(infile, 'r') as fi:
        for line in fi:
            if line.startswith('#'): continue
            word = func(line)
            if word: words.add(word)

def trans_cedict(infile, outfile):
    ''' trans_cedict infile outfile : translate cedict infile to outfile. '''
    words = set()
    def __inner(line, words):
        word = line.split()[1].decode('utf-8')
        if len(word) > 1: return word
    read_file(infile, __inner, words)
    write_dict(words, outfile)

def trans_plain(infile, outfile):
    ''' trans_plain infile outfile : translate plain infile to outfile. '''
    words = set()
    def __inner(line, words):
        try: word = line.decode('utf-8').strip()
        except: return
        if len(word) > 1: return word
    read_file(infile, __inner, words)
    write_dict(words, outfile)

cmds = ['create', 'importdb', 'exportdb', 'add', 'remove',
        'lookup', 'cals', 'stat', 'waterlevel', 'scale',
        'shrink', 'flat', 'trans_cedict', 'trans_plain']
def main():
    opts, args = getopt.getopt(sys.argv[1:], 'd:')
    for opt, val in opts:
        if opt == '-d':
            global dbname
            dbname = val
    if len(args) == 0:
        print '%s cmds params ...' % sys.argv[0]
        for name in cmds: print '%s:%s' % (name, eval(name).__doc__)
    else: eval(args[0])(*args[1:])

if __name__ == '__main__': main()
